{-# OPTIONS_GHC -fno-warn-orphans #-}

module Text.Megaparsec.CharSpec (spec) where

import Control.Monad
import qualified Data.CaseInsensitive as CI
import Data.Char
import Data.List (isPrefixOf, nub, partition)
import Test.Hspec
import Test.Hspec.Megaparsec
import Test.Hspec.Megaparsec.AdHoc
import Test.QuickCheck
import Text.Megaparsec
import Text.Megaparsec.Char

instance Arbitrary GeneralCategory where
  arbitrary = elements [minBound .. maxBound]

spec :: Spec
spec = do
  describe "newline" $
    checkStrLit "newline" "\n" (pure <$> newline)

  describe "csrf" $
    checkStrLit "crlf newline" "\r\n" crlf

  describe "eol" $ do
    context "when stream begins with a newline" $
      it "succeeds returning the newline" $
        property $ \s -> do
          let s' = '\n' : s
          prs eol s' `shouldParse` "\n"
          prs' eol s' `succeedsLeaving` s
    context "when stream begins with CRLF sequence" $
      it "parses the CRLF sequence" $
        property $ \s -> do
          let s' = '\r' : '\n' : s
          prs eol s' `shouldParse` "\r\n"
          prs' eol s' `succeedsLeaving` s
    context "when stream begins with '\\r', but it's not followed by '\\n'" $
      it "signals correct parse error" $
        property $ \ch ->
          ch /= '\n' ==> do
            let s = ['\r', ch]
            prs eol s `shouldFailWith` err 0 (utoks s <> elabel "end of line")
    context "when input stream is '\\r'" $
      it "signals correct parse error" $
        prs eol "\r"
          `shouldFailWith` err
            0
            (utok '\r' <> elabel "end of line")
    context "when stream does not begin with newline or CRLF sequence" $
      it "signals correct parse error" $
        property $ \ch s ->
          (ch `notElem` "\r\n") ==> do
            let s' = ch : s
            prs eol s'
              `shouldFailWith` err
                0
                (utoks (take 2 s') <> elabel "end of line")
    context "when stream is empty" $
      it "signals correct parse error" $
        prs eol ""
          `shouldFailWith` err
            0
            (ueof <> elabel "end of line")

  describe "tab" $
    checkStrLit "tab" "\t" (pure <$> tab)

  describe "space" $
    it "consumes space up to first non-space character" $
      property $ \s' -> do
        let (s0, s1) = partition isSpace s'
            s = s0 ++ s1
        prs space s `shouldParse` ()
        prs' space s `succeedsLeaving` s1

  describe "hspace" $
    it "consumes space up to first non-space character" $
      property $ \s' -> do
        let (s0, s1) = partition isHSpace s'
            s = s0 ++ s1
        prs hspace s `shouldParse` ()
        prs' hspace s `succeedsLeaving` s1

  describe "space1" $ do
    context "when stream does not start with a space character" $
      it "signals correct parse error" $
        property $ \ch s' ->
          not (isSpace ch) ==> do
            let (s0, s1) = partition isSpace s'
                s = ch : s0 ++ s1
            prs space1 s `shouldFailWith` err 0 (utok ch <> elabel "white space")
            prs' space1 s `failsLeaving` s
    context "when stream starts with a space character" $
      it "consumes space up to first non-space character" $
        property $ \s' -> do
          let (s0, s1) = partition isSpace s'
              s = ' ' : s0 ++ s1
          prs space1 s `shouldParse` ()
          prs' space1 s `succeedsLeaving` s1
    context "when stream is empty" $
      it "signals correct parse error" $
        prs space1 "" `shouldFailWith` err 0 (ueof <> elabel "white space")

  describe "hspace1" $ do
    context "when stream does not start with a space character" $
      it "signals correct parse error" $
        property $ \ch s' ->
          not (isHSpace ch) ==> do
            let (s0, s1) = partition isHSpace s'
                s = ch : s0 ++ s1
            prs hspace1 s `shouldFailWith` err 0 (utok ch <> elabel "white space")
            prs' hspace1 s `failsLeaving` s
    context "when stream starts with a space character" $
      it "consumes space up to first non-space character" $
        property $ \s' -> do
          let (s0, s1) = partition isHSpace s'
              s = ' ' : s0 ++ s1
          prs hspace1 s `shouldParse` ()
          prs' hspace1 s `succeedsLeaving` s1
    context "when stream is empty" $
      it "signals correct parse error" $
        prs hspace1 "" `shouldFailWith` err 0 (ueof <> elabel "white space")

  describe "controlChar" $
    checkCharPred "control character" isControl controlChar

  describe "spaceChar" $
    checkCharRange "white space" " \160\t\n\r\f\v" spaceChar

  describe "upperChar" $
    checkCharPred "uppercase letter" isUpper upperChar

  describe "lowerChar" $
    checkCharPred "lowercase letter" isLower lowerChar

  describe "letterChar" $
    checkCharPred "letter" isAlpha letterChar

  describe "printChar" $
    checkCharPred "printable character" isPrint printChar

  describe "digitChar" $
    checkCharRange "digit" ['0' .. '9'] digitChar

  describe "binDigitChar" $
    checkCharRange "binary digit" ['0' .. '1'] binDigitChar

  describe "octDigitChar" $
    checkCharRange "octal digit" ['0' .. '7'] octDigitChar

  describe "hexDigitChar" $
    checkCharRange "hexadecimal digit" (['0' .. '9'] ++ ['a' .. 'f'] ++ ['A' .. 'F']) hexDigitChar

  describe "markChar" $
    checkCharRange "mark character" "\71229\7398" markChar

  describe "numberChar" $
    let xs = "\185\178\179\188\189\190" ++ ['0' .. '9']
     in checkCharRange "numeric character" xs numberChar

  describe "punctuationChar" $
    checkCharPred "punctuation" isPunctuation punctuationChar

  describe "symbolChar" $
    checkCharRange "symbol" "<>$£`~|×÷^®°¸¯=¬+¤±¢¨´©¥¦" symbolChar
  describe "separatorChar" $
    checkCharRange "separator" " \160" separatorChar

  describe "asciiChar" $
    checkCharPred "ASCII character" isAscii asciiChar

  describe "latin1Char" $ do
    context "when stream begins with Latin-1 character" $
      it "parses the Latin-1 character" $
        property $ \ch s ->
          isLatin1 ch ==> do
            let s' = ch : s
            prs latin1Char s' `shouldParse` ch
            prs' latin1Char s' `succeedsLeaving` s
    context "when stream does not begin with Latin-1 character" $
      it "signals correct parse error" $
        do
          prs latin1Char "б"
            `shouldFailWith` err 0 (utok 'б' <> elabel "Latin-1 character")
          prs' latin1Char "в" `failsLeaving` "в"
    context "when stream is empty" $
      it "signals correct parse error" $
        prs latin1Char "" `shouldFailWith` err 0 (ueof <> elabel "Latin-1 character")

  describe "charCategory" $ do
    context "when parser corresponding to general category of next char is used" $
      it "succeeds" $
        property $ \ch s -> do
          let s' = ch : s
              g = generalCategory ch
          prs (charCategory g) s' `shouldParse` ch
          prs' (charCategory g) s' `succeedsLeaving` s
    context "when parser's category does not match next character's category" $
      it "fails" $
        property $ \g ch s ->
          (generalCategory ch /= g) ==> do
            let s' = ch : s
            prs (charCategory g) s'
              `shouldFailWith` err 0 (utok ch <> elabel (categoryName g))
            prs' (charCategory g) s' `failsLeaving` s'
    context "when stream is empty" $
      it "signals correct parse error" $
        property $ \g ->
          prs (charCategory g) ""
            `shouldFailWith` err 0 (ueof <> elabel (categoryName g))

  describe "char" $ do
    context "when stream begins with the character specified as argument" $
      it "parses the character" $
        property $ \ch s -> do
          let s' = ch : s
          prs (char ch) s' `shouldParse` ch
          prs' (char ch) s' `succeedsLeaving` s
    context "when stream does not begin with the character specified as argument" $
      it "signals correct parse error" $
        property $ \ch ch' s ->
          ch /= ch' ==> do
            let s' = ch' : s
            prs (char ch) s' `shouldFailWith` err 0 (utok ch' <> etok ch)
            prs' (char ch) s' `failsLeaving` s'
    context "when stream is empty" $
      it "signals correct parse error" $
        property $ \ch ->
          prs (char ch) "" `shouldFailWith` err 0 (ueof <> etok ch)

  describe "char'" $ do
    context "when stream begins with the character specified as argument" $ do
      it "parses the character" $
        property $ \ch s -> do
          let sl = toLower ch : s
              su = toUpper ch : s
              st = toTitle ch : s
          prs (char' ch) sl `shouldParse` toLower ch
          prs (char' ch) su `shouldParse` toUpper ch
          prs (char' ch) st `shouldParse` toTitle ch
          prs' (char' ch) sl `succeedsLeaving` s
          prs' (char' ch) su `succeedsLeaving` s
      context "when the character is not upper or lower" $
        -- See https://ghc.haskell.org/trac/ghc/ticket/14589
        it "matches it against a form obtained via one of the conversion functions" $
          property $ \s -> do
            let ch = '\9438'
                s' = '\9412' : s
            prs (char' ch) s' `shouldParse` '\9412'
            prs' (char' ch) s' `succeedsLeaving` s
    context "when stream does not begin with the character specified as argument" $ do
      it "signals correct parse error" $
        property $ \ch ch' s ->
          not (casei ch ch') ==> do
            let s' = ch' : s
                ms = utok ch' <> etok (toLower ch) <> etok (toUpper ch) <> etok (toTitle ch)
            prs (char' ch) s' `shouldFailWith` err 0 ms
            prs' (char' ch) s' `failsLeaving` s'
      context "when the character is not upper or lower" $
        it "lists correct options in the error message" $
          property $ \ch s ->
            not (casei '\9438' ch) ==> do
              let ms = utok ch <> etok '\9438' <> etok '\9412'
                  s' = ch : s
              prs (char' '\9438') s' `shouldFailWith` err 0 ms
    context "when stream is empty" $
      it "signals correct parse error" $
        property $ \ch -> do
          let options = etok <$> [toLower ch, toTitle ch, toUpper ch]
              ms = ueof <> mconcat (nub options)
          prs (char' ch) "" `shouldFailWith` err 0 ms

  describe "string" $ do
    context "when stream is prefixed with given string" $
      it "parses the string" $
        property $ \str s -> do
          let s' = str ++ s
          prs (string str) s' `shouldParse` str
          prs' (string str) s' `succeedsLeaving` s
    context "when stream is not prefixed with given string" $
      it "signals correct parse error" $
        property $ \str s ->
          not (str `isPrefixOf` s) ==> do
            let us = take (length str) s
            prs (string str) s `shouldFailWith` err 0 (utoks us <> etoks str)

  describe "string'" $ do
    context "when stream is prefixed with given string" $
      it "parses the string" $
        property $ \str s ->
          forAll (fuzzyCase str) $ \str' -> do
            let s' = str' ++ s
            -- Rare tricky cases we don't want to deal with.
            when (CI.mk str /= CI.mk str') discard
            prs (string' str) s' `shouldParse` str'
            prs' (string' str) s' `succeedsLeaving` s
    context "when stream is not prefixed with given string" $
      it "signals correct parse error" $
        property $ \str s ->
          not (str `isPrefixOfI` s) ==> do
            let us = take (length str) s
            prs (string' str) s `shouldFailWith` err 0 (utoks us <> etoks str)

----------------------------------------------------------------------------
-- Helpers

checkStrLit :: String -> String -> Parser String -> SpecWith ()
checkStrLit name ts p = do
  context ("when stream begins with " ++ name) $
    it ("parses the " ++ name) $
      property $ \s -> do
        let s' = ts ++ s
        prs p s' `shouldParse` ts
        prs' p s' `succeedsLeaving` s
  context ("when stream does not begin with " ++ name) $
    it "signals correct parse error" $
      property $ \ch s ->
        ch /= head ts ==> do
          let s' = ch : s
              us = take (length ts) s'
          prs p s' `shouldFailWith` err 0 (utoks us <> etoks ts)
          prs' p s' `failsLeaving` s'
  context "when stream is empty" $
    it "signals correct parse error" $
      prs p "" `shouldFailWith` err 0 (ueof <> etoks ts)

checkCharPred :: String -> (Char -> Bool) -> Parser Char -> SpecWith ()
checkCharPred name f p = do
  context ("when stream begins with " ++ name) $
    it ("parses the " ++ name) $
      property $ \ch s ->
        f ch ==> do
          let s' = ch : s
          prs p s' `shouldParse` ch
          prs' p s' `succeedsLeaving` s
  context ("when stream does not begin with " ++ name) $
    it "signals correct parse error" $
      property $ \ch s ->
        not (f ch) ==> do
          let s' = ch : s
          prs p s' `shouldFailWith` err 0 (utok ch <> elabel name)
          prs' p s' `failsLeaving` s'
  context "when stream is empty" $
    it "signals correct parse error" $
      prs p "" `shouldFailWith` err 0 (ueof <> elabel name)

checkCharRange :: String -> String -> Parser Char -> SpecWith ()
checkCharRange name tchs p = do
  forM_ tchs $ \tch ->
    context ("when stream begins with " ++ showTokens sproxy (nes tch)) $
      it ("parses the " ++ showTokens sproxy (nes tch)) $
        property $ \s -> do
          let s' = tch : s
          prs p s' `shouldParse` tch
          prs' p s' `succeedsLeaving` s
  context "when stream is empty" $
    it "signals correct parse error" $
      prs p "" `shouldFailWith` err 0 (ueof <> elabel name)

-- | Randomly change the case in the given string.
fuzzyCase :: String -> Gen String
fuzzyCase s = zipWith f s <$> vector (length s)
  where
    f k True = if isLower k then toUpper k else toLower k
    f k False = k

-- | The 'isPrefixOf' function takes two 'String's and returns 'True' iff
-- the first list is a prefix of the second with case-insensitive
-- comparison.
isPrefixOfI :: String -> String -> Bool
isPrefixOfI [] _ = True
isPrefixOfI _ [] = False
isPrefixOfI (x : xs) (y : ys) = x `casei` y && isPrefixOf xs ys

-- | Case-insensitive equality test for characters.
casei :: Char -> Char -> Bool
casei x y =
  x == toLower y
    || x == toUpper y
    || x == toTitle y

-- | Is it a horizontal space character?
isHSpace :: Char -> Bool
isHSpace x = isSpace x && x /= '\n' && x /= '\r'
